import pandas as pd  # 将pandas作为第三方库导入，我们一般为pandas取一个别名叫做pd
import os

pd.set_option('expand_frame_repr', False)  # 当列太多时清楚展示

file_location = r'../../data/basic-trading-data/stock_data/'

file_list = []
for root,dirs,files in os.walk(file_location):
    for fileName in files:
        if fileName.endswith('.csv'):
            filePath = os.path.join(root,fileName)
            file_list.append(filePath)

# print(file_list)
all_data = pd.DataFrame()
for fp in sorted(file_list)[:300]:
    df = pd.read_csv(fp,skiprows=1,encoding='gbk')
    all_data = all_data.append(df,ignore_index=True)

# 对数据进行排序
all_data.sort_values(by=['交易日期','股票代码'],inplace=True)
# print(all_data)

# 将数据存hdf
all_data.to_hdf(
    r'../../data/basic-trading-data/stock_data/a_stock.h5',
    key='all_data',
    mode='w'
)
all_data = pd.read_hdf(
    r'../../data/basic-trading-data/stock_data/a_stock.h5',
    key='all_data'
)
print(all_data)

# =====导入数据
# df = pd.read_csv(
#     filepath_or_buffer=r'../../data/basic-trading-data/stock_data/sh600000.csv',
#     encoding='gbk'
# )